home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Libris Britannia 4
/
science library(b).zip
/
science library(b)
/
PROGRAMM
/
CC_C
/
0566.ZIP
/
BAWK.C
< prev
next >
Wrap
Text File
|
1986-03-27
|
12KB
|
640 lines
/*
* Bawk main program
*/
#define MAIN 1
#include <stdio.h>
#include "bawk.h"
/*
* Main program
*/
main( argc, argv )
int argc;
char **argv;
{
char gotrules, didfile, getstdin;
getstdin =
didfile =
gotrules = 0;
/*
* Initialize global variables:
*/
Beginact =
Endact =
Rules =
Rulep =
#ifdef DEBUG
Debug =
#endif
Filename =
Linecount =
Saw_break = 0;
Stackptr = Stackbtm - 1;
Stacktop = Stackbtm + MAXSTACKSZ;
Nextvar = Vartab;
strcpy( Fieldsep, " \t" );
strcpy( Recordsep, "\n" );
/*
* Parse command line
*/
while ( --argc )
{
if ( **(++argv) == '-' )
{
/*
* Process dash options.
*/
switch ( tolower( *(++(*argv)) ) )
{
#ifdef DEBUG
case 'd':
++Debug;
break;
#endif
case 0:
++getstdin;
--argv;
goto dosomething;
break;
default: usage();
}
}
else
{
dosomething:
if ( gotrules )
{
/*
* Already read rules file - assume this is
* is a text file for processing.
*/
if ( ++didfile == 1 && Beginact )
doaction( Beginact );
if ( getstdin )
{
--getstdin;
newfile( 0 );
}
else
newfile( *argv );
process();
}
else
{
/*
* First file name argument on command line
* is assumed to be a rules file - attempt to
* compile it.
*/
if ( getstdin )
{
--getstdin;
newfile( 0 );
}
else
newfile( *argv );
compile();
gotrules = 1;
}
}
}
if ( !gotrules )
usage();
if ( ! didfile )
{
/*
* Didn't process any files yet - process stdin.
*/
newfile( 0 );
if ( Beginact )
doaction( Beginact );
process();
}
if ( Endact )
doaction( Endact );
}
/*
* Regular expression/action file compilation routines.
*/
compile()
{
/*
* Compile regular expressions and C actions into Rules struct,
* reading from current input file "Fileptr".
*/
int c, len;
#ifdef DEBUG
if ( Debug )
error( "compiling...", 0 );
#endif
while ( (c = getcharacter()) != -1 )
{
if ( c==' ' || c=='\t' || c=='\n' )
/* swallow whitespace */
;
else if ( c=='#' )
{
/*
* Swallow comments
*/
while ( (c=getcharacter()) != -1 && c!='\n' )
;
}
else if ( c=='{' )
{
#ifdef DEBUG
if ( Debug )
error( "action", 0 );
#endif
/*
* Compile (tokenize) the action string into our
* global work buffer, then allocate some memory
* for it and copy it over.
*/
ungetcharacter( '{' );
len = act_compile( Workbuf );
if ( Rulep && Rulep->action )
{
Rulep->nextrule = getmem( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
if ( !Rulep )
{
/*
* This is the first action encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = getmem( sizeof( *Rulep ) );
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
Rulep->action = getmem( len );
movemem( Workbuf, Rulep->action, len );
}
else if ( c==',' )
{
#ifdef DEBUG
if ( Debug )
error( "stop pattern", 0 );
#endif
/*
* It's (hopefully) the second part of a two-part
* pattern string. Swallow the comma and start
* compiling an action string.
*/
if ( !Rulep || !Rulep->pattern.start )
error( "stop pattern without a start",
RE_ERROR );
if ( Rulep->pattern.stop )
error( "already have a stop pattern",
RE_ERROR );
len = pat_compile( Workbuf );
Rulep->pattern.stop = getmem( len );
movemem( Workbuf, Rulep->pattern.stop, len );
}
else
{
/*
* Assume it's a regular expression pattern
*/
#ifdef DEBUG
if ( Debug )
error( "start pattern", 0 );
#endif
ungetcharacter( c );
len = pat_compile( Workbuf );
if ( *Workbuf == T_BEGIN )
{
/*
* Saw a "BEGIN" keyword - compile following
* action into special "Beginact" buffer.
*/
len = act_compile( Workbuf );
Beginact = getmem( len );
movemem( Workbuf, Beginact, len );
continue;
}
if ( *Workbuf == T_END )
{
/*
* Saw an "END" keyword - compile following
* action into special "Endact" buffer.
*/
len = act_compile( Workbuf );
Endact = getmem( len );
movemem( Workbuf, Endact, len );
continue;
}
if ( Rulep )
{
/*
* Already saw a pattern/action - link in
* another Rules structure.
*/
Rulep->nextrule = getmem( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
if ( !Rulep )
{
/*
* This is the first pattern encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = getmem( sizeof( *Rulep ) );
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
if ( Rulep->pattern.start )
error( "already have a start pattern",
RE_ERROR );
Rulep->pattern.start = getmem( len );
movemem( Workbuf, Rulep->pattern.start, len );
}
}
endfile();
}
/*
* Text file main processing loop.
*/
process()
{
/*
* Read a line at a time from current input file at "Fileptr",
* then apply each rule in the Rules chain to the input line.
*/
int i;
#ifdef DEBUG
if ( Debug )
error( "processing...", 0 );
#endif
Recordcount = 0;
while ( getline() )
{
/*
* Parse the input line.
*/
Fieldcount = parse( Linebuf, Fields, Fieldsep );
#ifdef DEBUG
if ( Debug>1 )
{
printf( "parsed %d words:\n", Fieldcount );
for(i=0; i<Fieldcount; ++i )
printf( "<%s>\n", Fields[i] );
}
#endif
Rulep = Rules;
do
{
if ( ! Rulep->pattern.start )
{
/*
* No pattern given - perform action on
* every input line.
*/
doaction( Rulep->action );
}
else if ( Rulep->pattern.startseen )
{
/*
* Start pattern already found - perform
* action then check if line matches
* stop pattern.
*/
doaction( Rulep->action );
if ( dopattern( Rulep->pattern.stop ) )
Rulep->pattern.startseen = 0;
}
else if ( dopattern( Rulep->pattern.start ) )
{
/*
* Matched start pattern - perform action.
* If a stop pattern was given, set "start
* pattern seen" flag and process every input
* line until stop pattern found.
*/
doaction( Rulep->action );
if ( Rulep->pattern.stop )
Rulep->pattern.startseen = 1;
}
}
while ( Rulep = Rulep->nextrule );
/*
* Release memory allocated by parse().
*/
while ( Fieldcount )
free( Fields[ --Fieldcount ] );
}
}
/*
* Miscellaneous functions
*/
parse( str, wrdlst, delim )
char *str;
char *wrdlst[];
char *delim;
{
/*
* Parse the string of words in "str" into the word list at "wrdlst".
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
* Returns the number of words parsed.
* CAUTION: the memory for the words in "wrdlst" is allocated
* by malloc() and should eventually be returned by free()...
*/
int wrdcnt, wrdlen;
char wrdbuf[ MAXLINELEN ], c;
wrdcnt = 0;
while ( *str )
{
while ( instr( *str, delim ) )
++str;
if ( !*str )
break;
wrdlen = 0;
while ( (c = *str) && !instr( c, delim ) )
{
wrdbuf[ wrdlen++ ] = c;
++str;
}
wrdbuf[ wrdlen++ ] = 0;
/*
* NOTE: allocate a MAXLINELEN sized buffer for every
* word, just in case user wants to copy a larger string
* into a field.
*/
wrdlst[ wrdcnt ] = getmem( MAXLINELEN );
strcpy( wrdlst[ wrdcnt++ ], wrdbuf );
}
return wrdcnt;
}
unparse( wrdlst, wrdcnt, str, delim )
char *wrdlst[];
int wrdcnt;
char *str;
char *delim;
{
/*
* Replace all the words in "str" with the words in "wrdlst",
* maintaining the same word seperation distance as found in
* the string.
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
*/
int wc;
char strbuf[ MAXLINELEN ], *sp, *wp, *start;
wc = 0; /* next word in "wrdlst" */
sp = strbuf; /* points to our local string */
start = str; /* save start address of "str" for later... */
while ( *str )
{
/*
* Copy the field delimiters from the original string to
* our local version.
*/
while ( instr( *str, delim ) )
*sp++ = *str++;
if ( !*str )
break;
/*
* Skip over the field in the original string and...
*/
while ( *str && !instr( *str, delim ) )
++str;
if ( wc < wrdcnt )
{
/*
* ...copy in the field in the wordlist instead.
*/
wp = wrdlst[ wc++ ];
while ( *wp )
*sp++ = *wp++;
}
}
/*
* Tie off the local string, then copy it back to caller's string.
*/
*sp = 0;
strcpy( start, strbuf );
}
instr( c, s )
char c, *s;
{
while ( *s )
if ( c==*s++ )
return 1;
return 0;
}
char *
getmem( len )
unsigned len;
{
char *cp;
if ( cp=malloc( len ) )
return cp;
error( "out of memory", MEM_ERROR );
}
char *
newfile( s )
char *s;
{
Linecount = 0;
if ( Filename = s )
{
#ifdef BDS_C
if ( fopen( s, Fileptr = Curfbuf ) == -1 )
#else
if ( !(Fileptr = fopen( s, "r" )) )
#endif
error( "file not found", FILE_ERROR );
}
else
{
/*
* No file name given - process standard input.
*/
Fileptr = stdin;
Filename = "standard input";
}
}
getline()
{
/*
* Read a line of text from current input file. Strip off
* trailing record seperator (newline).
*/
int rtn, len;
for ( len=0; len<MAXLINELEN; ++len )
{
if ( (rtn = getcharacter()) == *Recordsep || rtn == -1 )
break;
Linebuf[ len ] = rtn;
}
Linebuf[ len ] = 0;
if ( rtn == -1 )
{
endfile();
return 0;
}
return 1;
}
getcharacter()
{
/*
* Read a character from curren input file.
* WARNING: your getc() must convert lines that end with CR+LF
* to LF and CP/M's EOF character (^Z) to a -1.
* Also, getc() must return a -1 when attempting to read from
* an unopened file.
*/
int c;
#ifdef BDS_C
/*
* BDS C doesn't do CR+LF to LF and ^Z to -1 conversions <gag>
*/
if ( (c = getc( Fileptr )) == '\r' )
{
if ( (c = getc( Fileptr )) != '\n' )
{
ungetc( c );
c = '\r';
}
}
else if ( c == 26 ) /* ^Z */
c = -1;
#else
c = getc( Fileptr );
#endif
if ( c == *Recordsep )
++Recordcount;
if ( c=='\n' )
++Linecount;
return c;
}
ungetcharacter( c )
{
/*
* Push a character back into the input stream.
* If the character is a record seperator, or a newline character,
* the record and line counters are adjusted appropriately.
*/
if ( c == *Recordsep )
--Recordcount;
if ( c=='\n' )
--Linecount;
return ungetc( c, Fileptr );
}
endfile()
{
fclose( Fileptr );
Filename = Linecount = 0;
}
error( s, severe )
char *s;
int severe;
{
char *cp, *errat;
if ( Filename )
fprintf( stderr, "%s:", Filename );
if ( Linecount )
fprintf( stderr, " line %d:", Linecount );
fprintf( stderr, " %s\n", s );
if ( severe )
exit( severe );
}
usage()
{
error( "Usage: bawk <actfile> [<file> ...]\n", USAGE_ERROR );
}
movemem( from, to, count )
char *from, *to;
int count;
{
while ( count-- > 0 )
*to++ = *from++;
}
fillmem( array, count, value )
char *array, value;
int count;
{
while ( count-- > 0 )
*array++ = value;
}
strncmp( s, t, n )
char *s, *t;
int n;
{
while ( --n>0 && *s && *t && *s==*t )
{
++s;
++t;
}
if ( *s || *t )
return *s - *t;
return 0;
}
num( c )
char c;
{
return '0'<=c && c<='9';
}
alpha( c )
char c;
{
return ('a'<=c && c<='z') || ('A'<=c && c<='Z') || c=='_';
}
alphanum( c )
char c;
{
return alpha( c ) || num( c );
}